import pandas as pd
df = pd.read_csv('C:\\Users\\User\\Desktop\\result1.csv')
df.head(5)
X = df[['Tm','W','L', 'R/G', 'BA', 'OBP', 'SLG', 'OPS']]
y = df['ADV']
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25)
X_train.head(5)
X_train_NoName = X_train[['W','L', 'R/G', 'BA', 'OBP', 'SLG', 'OPS']]
X_train_NoName.head(5)
from sklearn.ensemble import RandomForestClassifier
rfc = RandomForestClassifier(max_depth = 5, n_estimators = 10)
rfc.fit(X_train_NoName, y_train)
X_test_NoName = X_test[['W','L', 'R/G', 'BA', 'OBP', 'SLG', 'OPS']]
X_test_NoName.head(5)
print('The accuracy of Random Forest Classifier on testing set:', rfc.score(X_test_NoName, y_test))
testResults = rfc.predict(X_test_NoName)
X_test['預測'] = testResults
X_test['真實'] = y_test
X_test['不同'] = X_test['預測'] - X_test['真實']
X_test.loc[X_test['不同'] != 0]
X_test.loc[X_test['不同'] == 0]
estimator = clf.estimators_[1]
from sklearn.tree import export_graphviz
export_graphviz(estimator,out_file='baseball.dot',
feature_names = X_train_NoName.columns,
class_names = [str(i) for i in y_test.unique()],
filled=True,
rounded=True,
special_characters=True)
from subprocess import call
call(['dot', '-Tpng', 'baseball.dot', '-o', 'baseball.png', '-Gdpi=600'])
from IPython.display import Image
Image(filename = 'baseball.png')